Read in data

ca_dgw <- read_sf(here("ca_dgw"), layer = "F2013_DBGS_Points_20150720_093252") %>%
  clean_names()

# some negative values which means well is above sea level

# check the projections
st_crs(ca_dgw) # wgs 84
## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]
# read in CA county data
ca_counties <- read_sf(here("ca_counties"), layer = "CA_Counties_TIGER2016") %>% 
  clean_names() %>% 
  select(name) # geometry is sticky so it still shows up here

# check the projections
st_crs(ca_counties)
## Coordinate Reference System:
##   User input: WGS 84 / Pseudo-Mercator 
##   wkt:
## PROJCRS["WGS 84 / Pseudo-Mercator",
##     BASEGEOGCRS["WGS 84",
##         DATUM["World Geodetic System 1984",
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4326]],
##     CONVERSION["Popular Visualisation Pseudo-Mercator",
##         METHOD["Popular Visualisation Pseudo Mercator",
##             ID["EPSG",1024]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["False easting",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["unknown"],
##         AREA["World - 85°S to 85°N"],
##         BBOX[-85.06,-180,85.06,180]],
##     ID["EPSG",3857]]
ca_counties <- st_transform(ca_counties, st_crs(ca_dgw)) # can list the number or the crs from another sf object

# make a quick plot

ggplot() +
  geom_sf(data = ca_counties) +
  geom_sf(data = ca_dgw, aes(color = dgbs))

map interactively to explore furthur

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(ca_dgw) +
  tm_dots("dgbs") # dots means its a point plot
## Variable(s) "dgbs" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

san joaquin county

# only choose sj county
sj_county <-  ca_counties %>% 
  filter(name == "San Joaquin")

# keep obs for groundwater depth within that county
sj_depth <- ca_dgw %>% 
  st_intersection(sj_county) # intersection between dgw data and sj county
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
plot(sj_depth)
## Warning: plotting the first 10 out of 18 attributes; use max.plot = 18 to plot
## all

plot(sj_county)

ggplot() +
  geom_sf(data = sj_county) +
  geom_sf(data = sj_depth, aes(color = dgbs))

# take a look at what patterns we see before any analysis
# looks like smaller ones are in the western part, while the depths increases as we move east
# not changing this data want to predcit what its gonna look like

check for duplicates aka spatial singularties

cant have 2 observations at the same exact location with different values

can get rid of duplicates, or get the mean values of them

well_duplicates <- sj_depth %>% 
  get_dupes(latitude, longitude) # wow what a function! check for duplicates

well_duplicates
## Simple feature collection with 4 features and 19 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -121.26 ymin: 38.04 xmax: -121.25 ymax: 38.04
## geographic CRS: WGS 84
## # A tibble: 4 x 20
##   latitude longitude dupe_count site_code local_well state_well wcr_number
##      <dbl>     <dbl>      <int> <chr>     <chr>      <chr>      <chr>     
## 1     38.0     -121.          2 380400N1… 02N06E01H… 02N06E01H… <NA>      
## 2     38.0     -121.          2 380400N1… 02N06E01H… 02N06E01H… <NA>      
## 3     38.0     -121.          2 380400N1… 02N07E07G… 02N07E07G… <NA>      
## 4     38.0     -121.          2 380400N1… 02N07E07G… 02N07E07G… <NA>      
## # … with 13 more variables: well_use <dbl>, msmt_date <date>, msmt_agenc <dbl>,
## #   wsel <dbl>, dgbs <dbl>, rp_elevati <dbl>, gs_elevati <dbl>,
## #   msmt_metho <dbl>, msmt_issue <dbl>, msmt_comme <chr>, link_to_wd <chr>,
## #   name <chr>, geometry <POINT [°]>
# in the future probably take the average between the dupliocates

sj_depth <- sj_depth %>% 
  filter(!local_well %in% well_duplicates$local_well)  # get rid of dupes

sj_depth %>% 
  get_dupes(latitude, longitude) # no more dupes!
## No duplicate combinations found of: latitude, longitude
## Simple feature collection with 0 features and 19 fields
## bbox:           xmin: NA ymin: NA xmax: NA ymax: NA
## geographic CRS: WGS 84
## # A tibble: 0 x 20
## # … with 20 variables: latitude <dbl>, longitude <dbl>, dupe_count <int>,
## #   site_code <chr>, local_well <chr>, state_well <chr>, wcr_number <chr>,
## #   well_use <dbl>, msmt_date <date>, msmt_agenc <dbl>, wsel <dbl>, dgbs <dbl>,
## #   rp_elevati <dbl>, gs_elevati <dbl>, msmt_metho <dbl>, msmt_issue <dbl>,
## #   msmt_comme <chr>, link_to_wd <chr>, name <chr>, geometry <GEOMETRY [°]>

make a variogram!

sj_dgw_vgm <- variogram(dgbs ~ 1, data = sj_depth)

plot(sj_dgw_vgm)

# Looks variogramish! Increasing variance as observations get further apart.
# but want to have countinuos function

sj_dgw_vgm_fit <- fit.variogram(sj_dgw_vgm, model = 
                                  vgm(nugget = 20, # nugget
                                      psill = 3000, # sill, no further increase in semivariance
                                      model = "Gau", # Gaussian
                                      range = 30) # range, distance at which no longer correlation 
                                ) 

# Plot them together:
plot(sj_dgw_vgm, sj_dgw_vgm_fit) 

sj_dgw_vgm_fit
##   model     psill    range
## 1   Nug  102.3052  0.00000
## 2   Gau 2843.6996 17.18188
# gives the estimate for the psill, nugget and range
# nugget = 102.3049
# psill = 2843.7017
# range = 17.18188

# how dependency changes while we move away from a point

Spatial kriging (interpolation)

# first make a grid over which well krige:

sj_grid <- st_bbox(sj_county) %>%  # find lat and long limits 
  st_as_stars(dx = 0.01, dy = 0.01) %>% # diff in x and y between points, increase the dx/dy, makes it less fine
  st_set_crs(4326) %>% 
  st_crop(sj_county) # crop this grid to the outline of what you put
## although coordinates are longitude/latitude, st_union assumes that they are planar
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
plot(sj_grid) # cool!

sj_grid
## stars object with 2 dimensions and 1 attribute
## attribute(s):
##     values     
##  Min.   :0     
##  1st Qu.:0     
##  Median :0     
##  Mean   :0     
##  3rd Qu.:0     
##  Max.   :0     
##  NA's   :1701  
## dimension(s):
##   from to   offset delta refsys point values x/y
## x    1 67 -121.585  0.01 WGS 84    NA   NULL [x]
## y    1 82  38.3003 -0.01 WGS 84    NA   NULL [y]
sj_dgw_krige <- krige(dgbs ~ 1, sj_depth, sj_grid, model = sj_dgw_vgm_fit)
## [using ordinary kriging]
# Initial plot of kriging outcome: 
plot(sj_dgw_krige) # prediction of what the dg is,plots the predicted values

# Convert it to a spatial data frame
krige_df <- as.data.frame(sj_dgw_krige) %>% 
  st_as_sf(coords = c("x","y")) %>% 
  drop_na(var1.pred)

st_crs(krige_df) <- 4326

# Then we can use ggplot: 
ggplot(data = krige_df) +
  geom_sf(aes(color = var1.pred)) +
  scale_color_gradient(low = "blue", high = "yellow") # woah